library(limma)
library(DESeq2)
library(biomaRt)
library(org.Hs.eg.db)
library(readxl)
library(reshape2)
library(ggplot2)
library(ggfortify)
library(ggpubr)
library(caret)
library(steFunctions)
library(EnsDb.Hsapiens.v79)
library(heatmap3)
library(tidyverse)
library(ComplexHeatmap)
library(circlize)
library(doMC)
library(pROC)
#define function to get color gradient
getGradientColors <- function(colValue, minRange = NULL, maxRange = NULL, ccolors = c('green3', 'red'), bbreak = 0.01){
library(heatmap3)
if(is.null(minRange)){
minRange <- min(colValue, na.rm = T) - 0.001
}
if(is.null(maxRange)){
maxRange <- max(colValue, na.rm = T) + 0.001
}
bre <- seq(minRange, maxRange, bbreak)
colori <- colByValue(colValue, col = colorRampPalette(ccolors)(length(bre)-1), breaks= bre, cex.axis = 0.8)
colori <- as.vector(colori)
graphics.off()
names(colori) <- names(colValue)
return(colori)
}
MEDIAsave="/home/mclaudia/MEDIA Project/Codici_finali/"
wdd="/home/mclaudia/MEDIA Project/OAStratification-pipeline/"
MEDIAgraph="/home/mclaudia/MEDIA Project/Codici_finali/Paper_figures/"
load(paste0(MEDIAsave,"genide-bulk-unpaired.RData"),verbose=TRUE) #for TPM trasformation
## Loading objects:
## OAvsnonOA
## dds0
## OAvsnonOAsig
## OAvsnonOA_dup
## gn_dup
up_genes <- read_excel(paste0(MEDIAsave,"up_dw_genes_consensus.xlsx"), sheet=1)
dw_genes <- read_excel(paste0(MEDIAsave,"up_dw_genes_consensus.xlsx"), sheet=2)
ensdf <- GenomicFeatures::genes(EnsDb.Hsapiens.v79, return.type="DataFrame")
ens2gene <- as.data.frame(ensdf[,c("gene_id","gene_name")])
load(paste0(wdd,"data/txi.RData"))
patientDetails<-read.csv(paste0(wdd,"data/patientDetails_all_withMed.csv"),stringsAsFactors = F)
tmp <- txi$length[,match(unique(as.character(patientDetails$name)),colnames(txi$length))]
tmp<-merge(tmp,ens2gene,by.x="row.names",by.y="gene_id")
maintain <-OAvsnonOA[grep("^ENSG.+",OAvsnonOA$Row.names),]
maintain_length <-tmp[tmp$Row.names %in% maintain$Row.names,]
tmp <-tmp[!tmp$Row.names %in% maintain$Row.names,]
dupp <-tmp[duplicated(tmp$gene_name),"gene_name"]
gn_dup <-unique(dupp) #duplicated for filtered genes from the previous analysis
length2 <-as.data.frame(matrix(NA, length(gn_dup),ncol(maintain_length)))
colnames(length2) <-colnames(maintain_length)
rownames(length2) <-paste0("ENS_",1:length(gn_dup))
length2 <-length2[,-c(1,72)]
for(i in 1:length(gn_dup)){
x=gn_dup[i]
y=tmp[tmp$gene_name %in% x,-c(1,72)]
length2[i,1:ncol(length2)] <-apply(y,2,function(x) round(mean(as.numeric(x)),3))
}
length2 <-cbind(Row.names=rownames(length2),length2,gene_name=gn_dup) #fixed length of duplicated genes from maintained 1190
length2 <-length2[length2$gene_name %in% unique(OAvsnonOA_dup$hgnc_symbol),] #filter for duplicated from previous analysis 440
tmp3 <-rbind(maintain_length,as.matrix(length2)) #combine the two matrices
gene_length <-tmp3[,2:(ncol(tmp3)-1)]
gene_length <-t(apply(gene_length,1,as.numeric))
rownames(gene_length) <-tmp3$gene_id
colnames(gene_length) <-colnames(tmp3[,2:(ncol(tmp3)-1)])
rownames(gene_length) <-tmp3$gene_name
cc=assay(dds0)
dim(cc)
## [1] 15296 70
dim(gene_length)
## [1] 15291 70
cc <-cc[OAvsnonOA$Row.names,] #count matrix
gene_length <-gene_length[OAvsnonOA$hgnc_symbol,] #gene length matrix
identical(rownames(gene_length),as.character(OAvsnonOA$hgnc_symbol))
## [1] TRUE
identical(rownames(cc),as.character(OAvsnonOA$Row.names))
## [1] TRUE
x <- cc/gene_length
tpm.mat <- t((t(x) * 1e6)/colSums(x))
rownames(tpm.mat) <-OAvsnonOA$hgnc_symbol
head(colSums(tpm.mat))
## MRI001 MRI002 MRI003 MRI004 MRI005 MRI006
## 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
train_tpm <-log2(tpm.mat+0.001)
boxplot(train_tpm)
load(paste0(MEDIAsave,"validationDatasetPreprocessed.RData"),verbose = TRUE)
## Loading objects:
## dds
mart <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
tmp=assay(dds)
genes <- rownames(tmp)
bmObj <- getBM(attributes = c("hgnc_symbol", "ensembl_gene_id"),
filters = "hgnc_symbol", values = genes, bmHeader = T, mart = mart)
bmObj <-bmObj[order(bmObj$`Gene stable ID`),]
bmObj <-bmObj[!duplicated(bmObj$`HGNC symbol`),]
ii=intersect(genes,bmObj$`HGNC symbol`)
rownames(bmObj) <-bmObj$`HGNC symbol`
bmObj <-bmObj[ii,]
genes <- genes[genes %in% ii]
identical(genes,bmObj$`HGNC symbol`)
## [1] TRUE
out <- EDASeq::getGeneLengthAndGCContent(id = bmObj$`Gene stable ID`, #using hg19 annotation, download gene length
org = 'hg19',mode = "org.db")
gene_length <-na.omit(out[,1,drop=FALSE])
bmObj <-bmObj[bmObj$`Gene stable ID` %in% rownames(gene_length),]
genes <-genes[genes %in% rownames(bmObj)]
tmp <-tmp[genes,]
identical(rownames(gene_length),bmObj$`Gene stable ID`)
## [1] TRUE
identical(rownames(tmp),bmObj$`HGNC symbol`)
## [1] TRUE
dim(tmp)
## [1] 13916 38
dim(gene_length)
## [1] 13916 1
x <- apply(tmp,2,function(x) x/as.numeric(gene_length[,1]))
tpm.mat2 <- t((t(x) * 1e6)/colSums(x))
head(colSums(tpm.mat2))
## Normal_Cart_10_8 Normal_Cart_2_2 Normal_Cart_3_3 Normal_Cart_4_4
## 1e+06 1e+06 1e+06 1e+06
## Normal_Cart_5_5 Normal_Cart_6_6
## 1e+06 1e+06
validation_tpm <-log2(tpm.mat2+0.001)
boxplot(validation_tpm)
ii=intersect(rownames(train_tpm),rownames(validation_tpm)) #select common genes
train_tpm <-train_tpm[ii,]
validation_tpm <-validation_tpm[ii,]
dim(train_tpm)
## [1] 12702 70
dim(validation_tpm)
## [1] 12702 38
tmp <-t(quantileNormalization(t(train_tpm)))
train_tpm <-tmp
boxplot(train_tpm)
list_ranks <- as.numeric(sort(train_tpm[,1],decreasing=TRUE))
head(list_ranks)
## [1] 15.94666 14.86583 14.46304 14.02892 13.77931 13.56793
list_val <-vector("list",ncol(validation_tpm))
names(list_val) <-colnames(validation_tpm)
list_val <-apply(validation_tpm, 2, function(x) as.list(sort(x,decreasing = TRUE)))
list_valRank <-vector("list",ncol(validation_tpm))
names(list_valRank) <-colnames(validation_tpm)
for(i in 1:length(list_val)){
gg <-names(unlist(list_val[[i]]))
list_valRank[[i]] <-list_ranks
names(list_valRank[[i]]) <- gg
}
normdata2 <-matrix(NA,nrow = nrow(validation_tpm), ncol=ncol(validation_tpm))
rownames(normdata2) <-rownames(validation_tpm)
colnames(normdata2) <-colnames(validation_tpm)
for(i in 1:length(list_valRank)){
tmp <-as.data.frame(list_valRank[[i]])
rownames(tmp) <-names(list_valRank[[i]])
normdata2[,i] <-tmp[rownames(normdata2),]
}
boxplot(normdata2) #Validation re-scaled
boxplot(train_tpm) #Reference
save(validation_tpm,normdata2,train_tpm, file=paste0(MEDIAsave,"reference_validation_tpm_log2_and_qn_fin.RData"))
validation_tpm <-normdata2
validation_tpm <-validation_tpm[rownames(validation_tpm) %in% c(up_genes$gene,dw_genes$gene),] #select consensus genes present
dim(validation_tpm)
## [1] 42 38
train_tpm <-train_tpm[rownames(train_tpm) %in% rownames(validation_tpm),] #quantile reference
dim(train_tpm)
## [1] 42 70
train_tpm_forRidge <- train_tpm
validation_tpm_forRidge <- normdata2
set.seed(7694)
runs <-vector("list",100)
names(runs) <-paste0("run_",1:100)
runs <-lapply(runs,function(x) x <- sample(11:70, 10, replace = FALSE))
subsets <-vector("list",100)
names(subsets) <-paste0("run_",1:100)
for(i in 1:length(subsets)) subsets[[i]] <-train_tpm[,c(1:10,as.numeric(runs[[i]]))] #create subsets
trainCtrl.rpcv <- trainControl(method = "LOOCV", number = 1,
verboseIter = TRUE, returnData = FALSE,
savePredictions = TRUE,
classProbs = TRUE)
ClassBoot=as.factor(c(rep("N",10),rep("OA",10)))
list_tunes <-vector("list",100)
names(list_tunes) <-paste0("tunes_",1:100)
nCores <- 250 #change number of threads if needed
registerDoMC(nCores)
for(i in 1:length(subsets)){
train_dataset <-t(subsets[[i]])
train <- train(train_dataset,
y = ClassBoot,
method = "glmnet",
trControl = trainCtrl.rpcv,
metric = "Accuracy", allowParallel=TRUE,
tuneGrid = expand.grid(alpha = 0.5, #for elastic net, median between 0 and 1
lambda = seq(0.001,1,by = 0.001)))
list_tunes[[i]] <- coef(train$finalModel, train$finalModel$lambdaOpt)
}
save(list_tunes, file=paste0(MEDIAsave,"list_tunes_100_def.RData"))
table_occurrence <-matrix(0,100,length(rownames(train_tpm)))
colnames(table_occurrence) <-rownames(train_tpm)
rownames(table_occurrence) <-paste0("run_",1:100)
for(i in 1:nrow(table_occurrence)){
tmp=list_tunes[[i]][,1][-1]
table_occurrence[i,names(tmp)] <-tmp
}
table_occurrence[table_occurrence != 0] <-1
tbO <-melt(table_occurrence)
ggplot(tbO, aes(y=Var2, x=Var1, fill=value))+
geom_tile()+
theme(axis.text.y = element_text(size=12))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1,size=8))+
xlab("Runs")+
ylab("Genes")+
scale_fill_continuous(high = "red", low = "blue")
(selected <-sort(colSums(table_occurrence)[colSums(table_occurrence) >= 50],decreasing = TRUE))
## TNFSF11 THBS3 DNER LOXL3 DYSF HTRA1 ASPN TNNT3
## 100 95 94 94 90 90 87 51
selected1 <-names(selected)
meanvalues <-matrix(NA,100,nrow(validation_tpm))
rownames(meanvalues) <-paste0("run_",1:100)
colnames(meanvalues) <-rownames(train_tpm) #42 genes in common between validation an reference
for(i in 1:nrow(meanvalues)) meanvalues[i,rownames(list_tunes[[i]])[-1]] <-as.numeric(list_tunes[[i]])[-1]
meanvalues <-meanvalues[,selected1]
(cf <-colMeans(meanvalues))
## TNFSF11 THBS3 DNER LOXL3 DYSF HTRA1 ASPN
## 0.13556325 0.11050480 0.14982991 0.06883307 0.07084835 0.04097398 0.02928284
## TNNT3
## 0.02143482
barplot(sort(cf,decreasing = TRUE), col = "dodgerblue", cex.names=1.2) #plot coefficients
save(cf, file=paste0(MEDIAsave,"loadings_scores_8_elasticNet_fin.RData"))
ClassTRfull=as.factor(c(rep("N",10),rep("OA",60)))
score_df <-data.frame(patient=colnames(train_tpm), score=NA, class=ClassTRfull)
train_sc <-t(train_tpm[names(cf),])
sc <-rowSums(t(apply(train_sc,1,function(x) x*cf))) #compute score
rownames(score_df) <-score_df[,1]
score_df <-score_df[names(sc),]
score_df$score <-sc
score_df_train <-score_df
rm(score_df)
colnames(score_df_train)<-c("Patient","Score","Class")
ggplot(score_df_train, aes(x=Score,fill=Class)) +
geom_density(alpha=.7)+
xlab("score R")+
scale_fill_manual(values = c("plum","cyan"))+
theme_bw()
ggplot(score_df_train, aes(y=Score,x=Class, fill=Class))+
geom_boxplot(alpha=.6)+
stat_compare_means(cex=7,label.x=0.75,label.y = 4)+
geom_jitter((aes(colour = Class))) +
ylab("score R")+
scale_fill_manual(values = c("plum","cyan"))+
scale_color_manual(values = c("deeppink","turquoise"))+
theme_bw()
col1 <-getGradientColors(score_df_train$Score, ccolors = c("deepskyblue1","dodgerblue","dodgerblue3","navy"), maxRange = max(score_df_train$Score)+ 0.01, minRange = min(score_df_train$Score)-0.01)
score_df_train$col1 <-col1
score_df_train <-score_df_train[order(score_df_train$Patient),]
score_df_train$col2 <-c(rep("plum",10),rep("cyan",60))
sum(up_genes$gene %in% rownames(train_tpm))
## [1] 37
sum(dw_genes$gene %in% rownames(train_tpm))
## [1] 5
sel <-c(up_genes$gene,dw_genes$gene)[c(up_genes$gene,dw_genes$gene) %in% rownames(train_tpm)] #42
train_tpm <-train_tpm[sel,]
annGenes <-data.frame(genes=sel, col3=c(rep("red",37),rep("blue",5)))
summ=summary(score_df_train$Score)
col_fun = colorRamp2(c(summ[1],summ[2],summ[5],summ[6]), c("#00BFFF","#197CDD","#0F4DB3","#000080"))
score_df_train <-score_df_train[order(score_df_train$Score,decreasing = FALSE),] #order the patients for increasing Score
head(score_df_train)
## Patient Score Class col1 col2
## MRI009 MRI009 1.368077 N #00BFFF plum
## MRI008 MRI008 1.501484 N #04B7FF plum
## MRI002 MRI002 1.566680 N #06B4FF plum
## MRI006 MRI006 1.691381 N #0AAEFF plum
## MRI004 MRI004 1.718341 N #0BACFF plum
## MRI005 MRI005 1.737940 N #0BACFF plum
train_tpm_scal <-t(apply(train_tpm,1,scale))
rownames(train_tpm_scal) <-rownames(train_tpm)
colnames(train_tpm_scal) <-colnames(train_tpm)
train_tpm_scal[train_tpm_scal < -3] <- -3
train_tpm_scal[train_tpm_scal > 3] <- 3
heatmap3(train_tpm_scal[annGenes$genes,rownames(score_df_train)],
Colv=NA,
scale="none",
col=colorRampPalette(c("royalblue", "white", "red1"))(1024),
RowAxisColors = 1,
balanceColor = TRUE,
margins = c(10,10),
ColSideColors = as.matrix(score_df_train[,c(4,5)]),
ColSideLabs = c("score R","Status"),
RowSideColors = annGenes$col3,
RowSideLabs = "Consensus",
cexRow = 1, cexCol = 1)
legend(0.4,0.9,legend=c("OA","Normal"),cex=1.5,
fill=c("cyan","plum"), bty = "n", title = "Class")
legend(0.5,0.9,legend=c("UP","DOWN"),cex=1.5,
fill=c("red","blue"), bty = "n", title = "DE")
draw(Legend(col_fun = col_fun, title = "score R"), x = unit(14, "in"), y = unit(8, "in"))
ClassV=factor(c(rep("N",18),rep("OA",20)))
score_df <-data.frame(patient=colnames(validation_tpm), score=NA, class=ClassV)
val_sc <-t(validation_tpm[names(cf),])
identical(names(cf), colnames(val_sc)) #TRUE
## [1] TRUE
sc <-rowSums(t(apply(val_sc,1,function(x) x*cf)))
rownames(score_df) <-score_df[,1]
score_df <-score_df[names(sc),]
score_df$score <-sc
head(sc)
## Normal_Cart_10_8 Normal_Cart_2_2 Normal_Cart_3_3 Normal_Cart_4_4
## 2.722755 3.005583 2.078916 3.007800
## Normal_Cart_5_5 Normal_Cart_6_6
## 1.948965 2.135283
score_df_val <-score_df
rm(score_df)
colnames(score_df_val)<-c("Patient","Score","Class")
ggplot(score_df_val, aes(x=Score,fill=Class)) +
geom_density(alpha=.7)+
xlab("score R")+
scale_fill_manual(values = c("plum","cyan"))+
theme_bw()
ggplot(score_df_val, aes(y=Score,x=Class, fill=Class))+
geom_boxplot(alpha=.6)+
stat_compare_means(cex=7,label.x=0.75,label.y = 4)+
geom_jitter((aes(colour = Class))) +
ylab("score R")+
scale_fill_manual(values = c("plum","cyan"))+
scale_color_manual(values = c("deeppink","turquoise"))+
theme_bw()
col1 <-getGradientColors(score_df_val$Score, ccolors = c("deepskyblue1","dodgerblue","dodgerblue3","navy"), maxRange = max(score_df_val$Score)+ 0.01, minRange = min(score_df_val$Score)-0.01)
score_df_val$col1 <-col1
score_df_val <-score_df_val[order(score_df_val$Patient),]
score_df_val$col2 <-c(rep("plum",18),rep("cyan",20))
sum(up_genes$gene %in% rownames(validation_tpm))
## [1] 37
sum(dw_genes$gene %in% rownames(validation_tpm))
## [1] 5
validation_tpm <-validation_tpm[sel,]
summ2=summary(score_df_val$Score)
col_fun2 = colorRamp2(c(summ2[1],summ2[2],summ2[5],summ2[6]), c("#00BFFF","#1C88F2","#0D40AB","#000080"))
score_df_val <-score_df_val[order(score_df_val$Score,decreasing = FALSE),] #order the patients for increasing Score
head(score_df_val)
## Patient Score Class col1 col2
## normal_06 normal_06 0.7715032 N #00BFFF plum
## normal_04 normal_04 1.3818032 N #0FA7FF plum
## normal_02 normal_02 1.3919973 N #0FA6FF plum
## normal_10 normal_10 1.5136887 N #12A2FF plum
## Normal_Cart_5_5 Normal_Cart_5_5 1.9489655 N #1D91FF plum
## normal_07 normal_07 2.0107593 N #1D8FFE plum
validation_tpm_scal <-t(apply(validation_tpm,1,scale))
rownames(validation_tpm_scal) <-rownames(validation_tpm)
colnames(validation_tpm_scal) <-colnames(validation_tpm)
validation_tpm_scal[validation_tpm_scal < -3] <- -3
validation_tpm_scal[validation_tpm_scal > 3] <- 3
heatmap3(validation_tpm_scal[annGenes$genes,rownames(score_df_val)],
Colv=NA,
scale="none",
col=colorRampPalette(c("royalblue", "white", "red1"))(1024),
RowAxisColors = 1,
balanceColor = TRUE,
margins = c(10,10),
ColSideColors = as.matrix(score_df_val[,c(4,5)]),
ColSideLabs = c("score R","Status"),
RowSideColors = annGenes$col3,
RowSideLabs = "Consensus",
cexRow = 1, cexCol = 1)
legend(0.4,0.9,legend=c("OA","Normal"),cex=1.5,
fill=c("cyan","plum"), bty = "n", title = "Class")
legend(0.5,0.9,legend=c("UP","DOWN"),cex=1.5,
fill=c("red","blue"), bty = "n", title = "DE")
draw(Legend(col_fun = col_fun, title = "score R"), x = unit(14, "in"), y = unit(8, "in"))
ClassV=as.factor(c(rep("N",18),rep("OA",20)))
score_df_val <-score_df_val[order(score_df_val$Class),]
rocobj <- roc(ClassV,score_df_val$Score)
auc <-round(rocobj$auc,3)
ggroc(rocobj, size = 2, col="dodgerblue",legacy.axes = TRUE) +
theme_bw()+
ggtitle(paste0('ROC Curve for score R on Validation dataset ', '(AUC = ', auc, ')'))+
labs(x = "1 - Specificity",y = "Sensitivity")
rocobj_val <-rocobj
##Train ### Tune the model
validation_tpm <-validation_tpm_forRidge[rownames(validation_tpm_forRidge) %in% c(up_genes$gene,dw_genes$gene),]
train_tpm <-train_tpm_forRidge[rownames(train_tpm_forRidge) %in% rownames(validation_tpm),]
ClassTRfull=as.factor(c(rep("N",10),rep("OA",60)))
trainCtrl.rpcv <- trainControl(method = "LOOCV", number = 1,
verboseIter = TRUE, returnData = FALSE,
savePredictions = TRUE,
classProbs = TRUE)
nCores <- 250 #change number if needed
registerDoMC(nCores)
set.seed(3744)
train_dataset <-t(train_tpm)
train <- train(train_dataset,
y = ClassTRfull,
method = "glmnet",
trControl = trainCtrl.rpcv,
metric = "Accuracy", allowParallel=TRUE,
tuneGrid = expand.grid(alpha = 0, #ridge
lambda = seq(0.001,1,by = 0.001)))
cf_all <-coef(train$finalModel, train$finalModel$lambdaOpt)[,1][-1]
save(cf_all, file=paste0(MEDIAsave,"loadings_scores_all_ridge_fin.RData"))
barplot(sort(cf_all,decreasing = TRUE), col = "dodgerblue",cex.names = 0.6) #plot coefficients
score_df_all_tr <-data.frame(patient=colnames(train_tpm), score=NA, class=ClassTRfull)
train_sc_all <-t(train_tpm[names(cf_all),])
sc1 <-rowSums(t(apply(train_sc_all,1,function(x) x*cf_all)))
rownames(score_df_all_tr) <-score_df_all_tr[,1]
score_df_all_tr <-score_df_all_tr[names(sc1),]
score_df_all_tr$score <-sc1
head(sc1)
## MRI001 MRI002 MRI003 MRI004 MRI005 MRI006
## 7.298994 3.104174 5.247277 4.097568 5.172805 4.335652
score_df_all_val <-data.frame(patient=colnames(validation_tpm), score=NA, class=ClassV)
val_sc_all <-t(validation_tpm[names(cf_all),])
identical(names(cf_all), colnames(val_sc_all)) #TRUE
## [1] TRUE
sc <-rowSums(t(apply(val_sc_all,1,function(x) x*cf_all)))
rownames(score_df_all_val) <-score_df_all_val[,1]
score_df_all_val <-score_df_all_val[names(sc),]
score_df_all_val$score <-sc
score_df_all_val <-score_df_all_val[order(score_df_all_val$score, decreasing=FALSE),]
rocobj_val_all<- roc(score_df_all_val$class,score_df_all_val$score)
auc <-round(rocobj_val_all$auc,3)
ggroc(rocobj_val_all, size = 2, col="dodgerblue", legacy.axes = TRUE) +
theme_bw()+
ggtitle(paste0('ROC Curve for score T on Validation dataset ', '(AUC = ', auc, ')'))+
labs(x = "1 - Specificity",y = "Sensitivity")
## Compare the ROC curves to assess the efficacy of the model
list_rocs <-list(rocobj_val,rocobj_val_all)
names(list_rocs) <-c("Score R - 8 features","Score T - 42 features")
auc <- lapply(list_rocs, function(x) round(x$auc,3))
auc[1]
## $`Score R - 8 features`
## [1] 0.883
auc[2]
## $`Score T - 42 features`
## [1] 0.922
rocts <-roc.test(rocobj_val,rocobj_val_all)
rocts
##
## DeLong's test for two ROC curves
##
## data: rocobj_val and rocobj_val_all
## D = -0.55529, df = 70.778, p-value = 0.5804
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2
## 0.8833333 0.9222222
ggroc(list_rocs, size = 2,legacy.axes = TRUE) +
theme_bw()+
annotate("text", 0.75, 0.44, label=expression("AUC 8 features - score s"["R"] : 0.883) ,size=7)+
annotate("text", 0.75,0.4, label=expression("AUC 42 features - score s"["T"] : 0.922) ,size=7)+
labs(x = "1 - Specificity",y = "Sensitivity",color='Models')+
theme(legend.title=element_text(size=16),legend.text=element_text(size=14))+
guides(Models = guide_legend(override.aes = list(size = 4)))
list_scores <-list(score_df_all_tr, score_df_all_val, score_df_train, score_df_val)
names(list_scores) <-c("scoreT Training","scoreT Valid","scoreR Training","scoreR Valid")
save(list_scores,file=paste0(MEDIAsave,"list_scores.RData"))